#replace ND with 0
tr <- matrix(data = NA, ncol = ncol(dt[,c(1:46)]), nrow=nrow(dt))
colnames(tr) <- colnames(dt[,c(1:46)])
for (i in 12:46)
{
tr[,c(i)] <- gsub(".*ND.*", 0, dt[,i])
}
for(i in 1:11)
{
tr[,c(i)] <- dt[,c(i)]
}
#transform to dataframe
tr <- as.data.frame.matrix(tr) #A correct command to change the dataset to dataframe after transformations
tr[,12:44] <- sapply(tr[,12:44],as.numeric) # Change a character to numeric (double)
typeof(tr$Cu_concentration) # confirm the value is no longer a character## [1] "double"
head(tr)| Scientific_Name | Group | Plot | Sample_Name | Tube_No | Type_of_Sample | Total_Weight | Cup_No | pXRF_measurement_ID | File | Material | Cl_concentration | Cl_uncertainty | Ca_concentration | Ca_uncertainty | Ti_concentration | Ti_uncertainty | Cr_concentration | Cr_uncertainty | Mn_concentration | Mn_uncertainty | Fe_concentration | Fe_uncertainty | Co_concentration | Co_uncertainty | Ni_concentration | Ni_uncertainty | Cu_concentration | Cu_uncertainty | Zn_concentration | Zn_uncertainty | As_concentration | As_uncertainty | Se_concentration | Se_uncertainty | Cd_concentration | Cd_uncertainty | Re_concentration | Re_uncertainty | Hg_concentration | Hg_uncertainty | Tl_concentration | Tl_uncertainty | Pb_concentration | Pb_uncertainty | Substrate_RT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Allionia incarnata | G3 | P1 | P1_29_1 | 30;31 | leaf | 0.55 | 17 | 2126 | Scan2126_19.14.hdx | Plant | 580 | 268 | 48102 | 704 | 143.0 | 27.5 | 7.7 | 5.5 | 36.2 | 10.0 | 1037 | 33.0 | 0 | 2.5 | 0 | 1.2 | 37.5 | 4.5 | 0.0 | 1.1 | 5.0 | 0.3 | 1.0 | 0.6 | 0.0 | 2.5 | 0.0 | 1.5 | 0 | 0.4 | 0 | 0.6 | 0 | 0.9 | 0.057585236 |
| Allionia incarnata | G3 | P1 | P1_30 | 28 | leaf | 0.166 | 18 | 2127 | Scan2127_19.19.hdx | Plant | 306 | 178 | 22621 | 439 | 132.0 | 22.5 | 4.8 | 4.0 | 47.8 | 10.7 | 1709 | 46.9 | 0 | 3.3 | 0 | 1.2 | 8.1 | 3.2 | 11.8 | 3.8 | 2.5 | 1.5 | 0.0 | 0.6 | 0.0 | 4.7 | 0.0 | 2.1 | 0 | 0.6 | 0 | 0.7 | 0 | 2.5 | 0.014363615 |
| Allionia incarnata | G3 | P1 | P1_29_1_2 | 29 | leaf | 0.213 | 19 | 2128 | Scan2128_19.25.hdx | Plant | 527 | 259 | 47147 | 698 | 124.0 | 25.7 | 10.9 | 6.2 | 25.2 | 9.1 | 866 | 30.8 | 0 | 2.3 | 0 | 1.2 | 35.9 | 5.0 | 0.0 | 1.2 | 6.5 | 0.4 | 0.8 | 0.6 | 0.0 | 3.8 | 0.0 | 1.9 | 0 | 0.5 | 0 | 0.5 | 0 | 1 | 0.034562402 |
| Allionia incarnata | G3 | P2 | P2_E12 | 33;34 | leaf | 0.332 | 20 | 2129 | Scan2129_19.30.hdx | Plant | 2576 | 462 | 37856 | 611 | 146.0 | 26.3 | 10.9 | 6.0 | 30.7 | 10.0 | 1320 | 35.8 | 0 | 2.5 | 0 | 1.2 | 47.1 | 5.1 | 0.0 | 1.8 | 6.2 | 0.4 | 2.6 | 0.7 | 0.0 | 4.5 | 4.6 | 2.6 | 0 | 0.5 | 0 | 0.5 | 0 | 0.8 | 0.054200765 |
| Allionia incarnata | G3 | P2 | P2_E12_1 | 32 | leaf | 0.183 | 21 | 2130 | Scan2130_19.35.hdx | Plant | 4756 | 619 | 29095 | 530 | 90.3 | 20.5 | 10.6 | 5.6 | 0.0 | 5.8 | 668 | 26.6 | 0 | 2.0 | 0 | 1.2 | 26.1 | 4.6 | 0.0 | 1.5 | 4.4 | 1.2 | 2.7 | 1.0 | 20.1 | 20.1 | 6.4 | 3.4 | 0 | 0.7 | 0 | 0.9 | 0 | 1.9 | 0.024824264 |
| Allionia incarnata | G3 | P2 | P2_E12_2 | 26 | leaf | 0.164 | 22 | 2131 | Scan2131_19.39.hdx | Plant | 753 | 234 | 6209 | 233 | 30.6 | 11.5 | 0.0 | 2.5 | 0.0 | 5.1 | 371 | 23.4 | 0 | 2.2 | 0 | 1.5 | 7.9 | 3.2 | 0.0 | 1.7 | 3.3 | 1.6 | 3.6 | 1.5 | 0.0 | 14.2 | 0.0 | 2.9 | 0 | 0.8 | 0 | 1.2 | 0 | 2.8 | 0.011043405 |
#Filtering with tydeverse library
dt_plants <- filter(tr, Scientific_Name != 'QA_Sample')
P1 <- filter(dt_plants, Plot == "P1")
P2 <- filter(dt_plants, Plot == "P2")
P5 <- filter(dt_plants, Plot == "P5")
P6 <- filter(dt_plants, Plot == "P6")
P125 <- filter(dt_plants, Plot != "P6")
Se_best <- subset(dt_plants, Scientific_Name == 'Isocoma cf. tenuisecta' | Scientific_Name == 'Populus fremontii' | Scientific_Name == 'Senegalia (Acacia) greggii' )
Re_best <- subset(dt_plants, Scientific_Name == 'Isocoma cf. tenuisecta' | Scientific_Name == 'Baccharis sarothroides' | Scientific_Name == 'Senegalia (Acacia) greggii'| Scientific_Name == 'Nultuma (Prosopis) velutina' | Scientific_Name == 'Mimosa biuncifera (=aculeaticarpa)' | Scientific_Name == 'Fraxinus velutina'| Scientific_Name == 'Datura wrightii' )
# Dropping uncertainty columns for PCA analysis
dt_plants_nounc = select(dt_plants, -Cl_uncertainty,-Ca_uncertainty, -Ti_uncertainty,
-Cr_uncertainty, -Mn_uncertainty, -Fe_uncertainty, -Ni_uncertainty, -Cu_uncertainty,
-Zn_uncertainty, -As_uncertainty, -Se_uncertainty, -Cd_uncertainty, -Re_uncertainty, -Hg_uncertainty, -Co_uncertainty,
-Tl_uncertainty, -Pb_uncertainty, -Substrate_RT)
dt_plants_nounc = select(dt_plants_nounc, -Hg_concentration, -Tl_concentration, -Pb_concentration, -Ni_concentration, -Co_concentration)
#Filtering plants By Plot with subset function
dt_plants_nounc1 <- subset(dt_plants_nounc, Plot=="P1")
dt_plants_nounc2 <- subset(dt_plants_nounc, Plot=="P2")
dt_plants_nounc5 <- subset(dt_plants_nounc, Plot=="P5")
dt_plants_nounc6 <- subset(dt_plants_nounc, Plot=="P6")
dt_plants_nounce15 <- subset(dt_plants_nounc, Plot=="P1" | Plot=="P5")
dt_plants_nounce125 <- subset(dt_plants_nounc, Plot=="P1" | Plot=="P5" | Plot=="P2")
#Removing _concentration from column names
colnames(dt_plants_nounce125)[12] <- "Cl"
colnames(dt_plants_nounce125)[13] <- "Ca"
colnames(dt_plants_nounce125)[14] <- "Ti"
colnames(dt_plants_nounce125)[15] <- "Cr"
colnames(dt_plants_nounce125)[16] <- "Mn"
colnames(dt_plants_nounce125)[17] <- "Fe"
colnames(dt_plants_nounce125)[18] <- "Cu"
colnames(dt_plants_nounce125)[19] <- "Zn"
colnames(dt_plants_nounce125)[20] <- "As"
colnames(dt_plants_nounce125)[21] <- "Se"
colnames(dt_plants_nounce125)[22] <- "Cd"
colnames(dt_plants_nounce125)[23] <- "Re"
colnames(dt_plants_nounc6)[12] <- "Cl"
colnames(dt_plants_nounc6)[13] <- "Ca"
colnames(dt_plants_nounc6)[14] <- "Ti"
colnames(dt_plants_nounc6)[15] <- "Cr"
colnames(dt_plants_nounc6)[16] <- "Mn"
colnames(dt_plants_nounc6)[17] <- "Fe"
colnames(dt_plants_nounc6)[18] <- "Cu"
colnames(dt_plants_nounc6)[19] <- "Zn"
colnames(dt_plants_nounc6)[20] <- "As"
colnames(dt_plants_nounc6)[21] <- "Se"
colnames(dt_plants_nounc6)[22] <- "Cd"
colnames(dt_plants_nounc6)[23] <- "Re"Cu_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Cu_concentration, FUN = median), y = Cu_concentration, group=Scientific_Name)) +
geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
#theme(legend.position = "none")+
scale_x_discrete(guide = guide_axis(angle = 0))+
geom_jitter(aes(colour = Plot), size=1) +
ylim(0,600)+
coord_flip()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Cu_AllPlotsRe_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Re_concentration, FUN = median), y = Re_concentration, group=Scientific_Name)) +
geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
#theme(legend.position = "none")+
scale_x_discrete(guide = guide_axis(angle = 0))+
geom_jitter(aes(colour = Plot), size=1) +
#ylim(0,600)+
coord_flip()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Re_AllPlotsRe_box <- ggplot(Re_best, aes(x = reorder(Scientific_Name, Re_concentration, FUN = median), y = Re_concentration, fill=Scientific_Name)) +
geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
theme(legend.position = "none")+
scale_x_discrete(guide = guide_axis(angle = 45))+
geom_jitter(color="#85b8bc", size=2, alpha=0.9) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
#scale_fill_manual(values = c("", "", "", "", "", "","" ))
scale_fill_manual(values = c("#4b2866", "#c7abdd", "#a578c9", "#381e4c", "#8347b2", "#5d327f","#251433" ))
#scale_fill_brewer(palette = "Greens")
Re_boxZn_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Zn_concentration, FUN = median), y = Zn_concentration, group=Scientific_Name)) +
geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
#theme(legend.position = "none")+
scale_x_discrete(guide = guide_axis(angle = 0))+
geom_jitter(aes(colour = Plot), size=1) +
#ylim(0,600)+
coord_flip()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Zn_AllPlotsSe_AllPlots<- ggplot(dt_plants, aes(x = reorder(Scientific_Name, Se_concentration, FUN = median), y = Se_concentration, group=Scientific_Name)) +
geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
#theme(legend.position = "none")+
scale_x_discrete(guide = guide_axis(angle = 0))+
geom_jitter(aes(colour = Plot), size=1) +
ylim(0,60)+
coord_flip()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E"))
Se_AllPlotsSe_box <- ggplot(Se_best, aes(x = reorder(Scientific_Name, Se_concentration, FUN=median), y = Se_concentration, fill=Scientific_Name)) +
geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
theme(legend.position = "none")+
scale_x_discrete(guide = guide_axis(angle = 45))+
geom_jitter(color="#85b8bc", size=3, alpha=0.9) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_fill_manual(values = c("#251433", "#c7abdd", "#8347b2"))
Se_boxPlants collected at the plot 6 were growing directly on the mine tailings that were exposed on the area of 100 x 100 m. Shrubs were also collected in the close vicinity to the tailings given their rooting depths.
Plot 6
Cu_Plot6 <- ggplot(P6, aes(x = reorder(Scientific_Name, Cu_concentration, FUN = median), y = Cu_concentration, group=Scientific_Name)) +
geom_boxplot()+theme_classic()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),axis.title.x=element_blank())+
#theme(legend.position = "none")+
scale_x_discrete(guide = guide_axis(angle = 0))+
geom_jitter(aes(colour = Plot), size=1.6) +
ylim(0,600)+
coord_flip()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#scale_fill_manual(values = c("#38A6A5", "#73AF48", "#EDAD08", "#CC503E", "#38A6A5", "#73AF48", "#EDAD08", "#CC503E", "#38A6A5", "#73AF48", "#EDAD08"))
Cu_Plot6require(stats)
myPr1 <- prcomp(dt_plants_nounc1[,12:23], scale=TRUE)
myPr2 <- prcomp(dt_plants_nounc2[,12:23], scale=TRUE)
myPr5 <- prcomp(dt_plants_nounc5[,12:23], scale=TRUE)
myPr6 <- prcomp(dt_plants_nounc6[,12:23], scale=TRUE)
myPr15 <- prcomp(dt_plants_nounce15[,12:23], scale=TRUE)
myPr125 <- prcomp(dt_plants_nounce125[,12:23], scale=TRUE) # it was not working because the scale was FALSEbiplot(myPr1, scale=0)biplot(myPr125, scale=0)biplot125 <- biplot(myPr125,
col=c('blue', 'red'),
cex=c(0.8, 0.8),
xlim=c(-.4, .4),
main='PCA Results',
expand=1.2)biplot6 <- biplot(myPr6,
col=c('blue', 'red'),
cex=c(0.8, 0.8),
xlim=c(-.4, .4),
main='PCA Results',
expand=1.2)dt_plants1 <- cbind(dt_plants_nounc1, myPr1$x[,1:2])
dt_plants2 <- cbind(dt_plants_nounc2, myPr2$x[,1:2])
dt_plants5 <- cbind(dt_plants_nounc5, myPr5$x[,1:2])
dt_plants6 <- cbind(dt_plants_nounc6, myPr6$x[,1:2])
dt_plants15 <- cbind(dt_plants_nounce15, myPr15$x[,1:2])# Plot for all plot
myPr_all <- prcomp(dt_plants_nounc[,12:23], scale=TRUE)
dt_plants_all <- cbind(dt_plants_nounc, myPr_all$x[,1:2])
ggplot(dt_plants_all, aes(PC1, PC2, col=Plot, fill=Plot))+
stat_ellipse(geom="polygon", col="black", alpha=0.5)+
theme_classic()+
geom_point(shape=21, col="black")plot(myPr125, type="l")summary(myPr1)## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.7892 1.6134 1.2723 1.09515 1.00262 0.8604 0.71435
## Proportion of Variance 0.2668 0.2169 0.1349 0.09995 0.08377 0.0617 0.04252
## Cumulative Proportion 0.2668 0.4837 0.6186 0.71853 0.80230 0.8640 0.90652
## PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.63622 0.59865 0.4436 0.30908 0.25739
## Proportion of Variance 0.03373 0.02987 0.0164 0.00796 0.00552
## Cumulative Proportion 0.94025 0.97012 0.9865 0.99448 1.00000
library(readr)
library(dplyr)
library(tidyr)##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:reshape':
##
## expand, smiths
## The following object is masked from 'package:reshape2':
##
## smiths
library(ropls)
dt_plants_nounc_3 <- dt_plants_nounc |> select(-Scientific_Name, -Group, -Plot, -Sample_Name, -Tube_No, -Type_of_Sample, -Cup_No, -pXRF_measurement_ID, -File, -Material)
typeof(dt_plants_nounc_3$Total_Weight)## [1] "character"
dt_plants_nounc_3[,1] <- sapply(dt_plants_nounc_3[,1],as.numeric)
dt_nounc_PCA <- opls(x=dt_plants_nounc_3)## PCA
## 226 samples x 13 variables
## standard scaling of predictors
## R2X(cum) pre ort
## Total 0.558 3 0
plot(dt_nounc_PCA)plot(dt_nounc_PCA, typeVc ="x-score", parAsColFcVn=dt_plants_nounc$Plot)dt_opls <-opls(dt_plants_nounc_3, dt_plants_nounc$Plot)## PLS-DA
## 226 samples x 13 variables and 1 response
## standard scaling of predictors and response(s)
## R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total 0.59 0.179 0.122 0.398 4 0 0.05 0.05
summary(dt_opls)## Length Class Mode
## 1 opls S4
plot(dt_nounc_PCA, typeVc ="x-score", parAsColFcVn=dt_plants_nounc$Cu)dt_opls <-opls(dt_plants_nounc_3, dt_plants_nounc$Cu)## PCA
## 226 samples x 13 variables
## standard scaling of predictors
## R2X(cum) pre ort
## Total 0.558 3 0